home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Shareware Grab Bag
/
Shareware Grab Bag.iso
/
007
/
bmgrep.arc
/
BMGREP.C
< prev
next >
Wrap
Text File
|
1986-12-09
|
27KB
|
918 lines
Relay-Version: version B 2.10.2 9/5/84; site philabs.UUCP
Path: philabs!cmcl2!harvard!talcott!panda!sources-request
From: sources-request@panda.UUCP
Newsgroups: mod.sources
Subject: bm version 1.2 (blindingly fast "fgrep")
Message-ID: <1424@panda.UUCP>
Date: 20 Feb 86 21:44:56 GMT
Date-Received: 21 Feb 86 23:14:56 GMT
Sender: jpn@panda.UUCP
Organization: Bell-Northern Research, Ottawa, Ontario
Lines: 896
Approved: jpn@panda.UUCP
Mod.sources: Volume 4, Issue 1
Submitted by: decvax!bnr-vpa!pdbain (Peter Bain)
This contains version 1.2 bm. Differences from 1.1 include certain
bug fixes, notably the one which prevented it from finding patterns
in lines which straddled buffer boundaries.
Please see the README for system dependencies.
********************** cut here *******************
#!/bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #!/bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
# bm.h
# bm.c
# Execute.c
# Extern.h
# GetPatFile.c
# Global.c
# MakeDesc.c
# MakeSkip.c
# MatchFound.c
# MkDescVec.c
# MoveResidue.c
# PrintLine.c
# PutUsage.c
# Search.c
# Makefile
# README
# bm.1
# This archive created: Thu Feb 13 09:21:33 1986
export PATH; PATH=/bin:$PATH
if test -f 'bm.h'
then
echo shar: over-writing existing file "'bm.h'"
fi
cat << \SHAR_EOF > 'bm.h'
#define FIRSTCHAR ' '
#define MAXCHAR 0377
#define MAXBUFF 8192
#define MAXSIZE 100
#define MAXPATS 100 /* max number of patterns */
#define PSIZEDEF 1024 /* space for patterns if they come from a tty */
#define min(x,y) ((x) < (y) ? (x) : (y))
#define max(x,y) ((x) > (y) ? (x) : (y))
struct PattDesc {
short unsigned int *Skip1, *Skip2; /* pointers to skip tables */
char *Pattern;
int PatLen; /* pattern length */
char *Start;
/* starting position of search (at beginning of pattern) */
int Success; /* true when pattern found */
};
SHAR_EOF
if test -f 'bm.c'
then
echo shar: over-writing existing file "'bm.c'"
fi
cat << \SHAR_EOF > 'bm.c'
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include "bm.h"
#include "Extern.h"
main(argc,argv)
int argc;
char *argv[];
{
/* grep based on Boyer-Moore algorithm */
char BigBuff[MAXBUFF + 2];
/*
* We leave one extra character at the beginning and end of the buffer,
* but don't tell Execute about it. This is so when someone is
* scanning the buffer and scans past the end (or beginning)
* we are still technically in the buffer (picky, but there ARE
* machines which would complain)
*/
int ret = 1, /* return code from Execute */
NotFound = 0, /* non-zero if file not readable */
NFiles,
NPats; /* number of patterns to search for */
char *FlagPtr,
**OptPtr; /* used to scan command line */
int TextFile /* file to search */;
struct PattDesc *DescVec[MAXPATS];
--argc;
OptPtr = argv + 1;
while ( argc && **OptPtr == '-') {
FlagPtr = *OptPtr + 1;
do {
switch (*FlagPtr) {
case 'c': cFlag = 1; break;
case 'e': eFlag = 1;
/* get the patterns from next arg */
NPats = MkDescVec(DescVec,*++OptPtr);
--argc;
break;
case 'f': fFlag = 1;
/* read the patterns from a file */
NPats = GetPatFile(*++OptPtr,DescVec);
--argc;
break;
case 'l': lFlag = 1; break;
case 'n': nFlag = 1; break;
case 's': sFlag = 1; break;
case 'x': xFlag = 1; break;
case 'h': hFlag = 1; break;
default:
fprintf(stderr,
"bm: invalid option: -%c \n",
*FlagPtr);
PutUsage();
exit(2);
} /* switch */
++FlagPtr;
} while (*FlagPtr);
++OptPtr; --argc;
} /* while */
/* OptPtr now points to patterns */
if (!fFlag && !eFlag) {
if (!argc) {
fprintf(stderr,"bm: no pattern specified\n");
PutUsage();
exit(2);
} else
NPats = MkDescVec(DescVec,*OptPtr);
++OptPtr; --argc;
}
/* OptPtr now points to first file */
NFiles = argc;
if (!NFiles)
ret &= Execute(DescVec,NPats,0,BigBuff+1);
else while (argc--) {
if ((NFiles > 1) || lFlag) FileName = *OptPtr;
if ((TextFile = open(*OptPtr,O_RDONLY,0)) < 0) {
fprintf(stderr,"bm: can't open %s\n",*OptPtr);
NotFound++;
}
else {
ret &= Execute(DescVec,NPats,TextFile,BigBuff+1);
if (sFlag && !ret)
exit(0);
close(TextFile);
} /* if */
++OptPtr;
} /* while */
if (cFlag) printf("%d\n",MatchCount);
exit(NotFound ? 2 : ret);
} /* main */
SHAR_EOF
if test -f 'Execute.c'
then
echo shar: over-writing existing file "'Execute.c'"
fi
cat << \SHAR_EOF > 'Execute.c'
#include <stdio.h>
#include "bm.h"
#include "Extern.h"
Execute(DescVec, NPats, TextFile, Buffer)
register struct PattDesc *DescVec[];
/* pointers to status vectors for the different
* patterns, including skip tables, position in buffer, etc. */
register int NPats; /* number of patterns */
register char Buffer[]; /* holds text from file */
register int TextFile; /* file to search */
{
int NRead, /* number of chars read from file */
NWanted, /* number of chars wanted */
NAvail, /* number of chars actually read */
BuffSize, /* number of chars in buffer */
BuffPos, /* offset of first char in Buffer in TextFile */
BuffEx, /* flag to indicate that buffer has been searched */
ResSize,
/* number of characters in the last, incomplete line */
Found, /* flag indicates whether pattern found
* completely and all matches printed */
Valid; /* was the match "valid", i.e. if -x used,
* did the whole line match? */
char *BuffEnd; /* pointer to last char of last complete line */
/* misc working variables */
int i;
/* initialize */
ResSize = 0;
Found = 0;
BuffPos = 0;
for (i=0; i < NPats; i++) {
DescVec[i] -> Success = 0;
DescVec[i] -> Start = Buffer;
} /* for */
/* now do the searching */
do {
/* first, read a bufferfull and set up the variables */
NWanted = MAXBUFF - ResSize; NRead = 0;
do {
NAvail =
read(TextFile,Buffer + ResSize + NRead, NWanted);
if (NAvail == -1) {
fprintf(stderr,
"bm: error reading from input file\n");
exit(2);
} /* if */
NRead += NAvail; NWanted -= NAvail;
} while (NAvail && NWanted);
BuffEx = 0;
BuffSize = ResSize + NRead;
BuffEnd = Buffer + BuffSize - 1;
/* locate the end of the last complete line */
while (*BuffEnd != '\n' && BuffEnd >= Buffer)
--BuffEnd;
if (BuffEnd < Buffer)
BuffEnd = Buffer + BuffSize - 1;
while (!BuffEx) { /* work through one buffer full */
BuffEx = 1; /* set it provisionally, then clear
* it if we find the buffer non-empty */
for (i=0; i< NPats; i++) {
if (!DescVec[i]->Success)
/* if the pattern has not been found */
DescVec[i]-> Success =
Search(DescVec[i]->Pattern,
DescVec[i]->PatLen, BuffEnd,
DescVec[i]->Skip1, DescVec[i]->Skip2,
DescVec[i]);
if (DescVec[i]->Success){
/* if a match occurred */
BuffEx = 0;
Valid = MatchFound(DescVec[i],BuffPos,
Buffer, BuffEnd);
Found |= Valid;
if ((sFlag || lFlag) && Found)
return(0);
} /* if */
} /* for */
} /* while */
if(NRead) {
ResSize = MoveResidue(DescVec,NPats,Buffer,
Buffer+BuffSize-1);
BuffPos += BuffSize - ResSize;
} /* if */
} while (NRead);
return(!Found);
} /* Execute */
SHAR_EOF
if test -f 'Extern.h'
then
echo shar: over-writing existing file "'Extern.h'"
fi
cat << \SHAR_EOF > 'Extern.h'
/* global flags for bm */
extern int cFlag, /* true if we want only a count of matching lines */
eFlag, /* indicates that next argument is the pattern */
fFlag, /* true if the patterns arew to come from a file */
lFlag, /* true if we want a list of files containing the pattern */
nFlag, /* true if we want the character offset of the pattern */
sFlag, /* true if we want silent mode */
xFlag, /* true if we want only lines which match entirely */
hFlag, /* true if we want no filenames in output */
MatchCount; /* count of number of times a search string was found
* in the text */
extern char *FileName;
SHAR_EOF
if test -f 'GetPatFile.c'
then
echo shar: over-writing existing file "'GetPatFile.c'"
fi
cat << \SHAR_EOF > 'GetPatFile.c'
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "bm.h"
int
GetPatFile(PatFile, DescVec)
char *PatFile;
struct PattDesc *DescVec[];
/* read patterns from a file and set up a pattern descriptor vector */
{
extern char *malloc();
FILE *PFile;
struct stat StatBuff;
int PatSize; /* the number of chars in all the patterns */
char *PatBuff; /* hold the patterns */
if (!(strlen(PatFile))) {
fprintf(stderr,"bm: no pattern file given\n");
exit(2);
} /* if */
if (!(PFile = fopen(PatFile,"r"))) {
fprintf(stderr,"bm: can't open pattern file %s\n",PatFile);
exit(2);
} /* if */
/* find out how big the patterns are */
if (fstat(fileno(PFile),&StatBuff) == -1) {
fprintf(stderr,"bm: can't fstat %s\n",PatFile);
exit(2);
} /* if */
if (isatty(fileno(PFile)))
PatSize = PSIZEDEF;
else PatSize = StatBuff.st_size;
if (!PatSize) {
fprintf(stderr,"bm: pattern file is empty\n");
exit(2);
} /* if */
if (!(PatBuff = malloc(PatSize+1))) {
fprintf(stderr,"bm: insufficient memory to store patterns\n");
exit(2);
} /* if */
fread(PatBuff,1,PatSize,PFile); /* get the patterns */
/* make sure the patterns are null-terminated. We can't have
* nulls in the patterns */
if (PatBuff[PatSize-1] == '\n')
PatBuff[PatSize-1] = '\0';
else
PatBuff[PatSize] = '\0';
fclose(PFile);
return(MkDescVec(DescVec,PatBuff));
} /* GetPatFile */
SHAR_EOF
if test -f 'Global.c'
then
echo shar: over-writing existing file "'Global.c'"
fi
cat << \SHAR_EOF > 'Global.c'
/* global flags for bm */
int cFlag=0, /* true if we want only a count of matching lines */
eFlag=0, /* indicates that next argument is the pattern */
fFlag=0, /* true if the patterns are to come from a file */
lFlag=0, /* true if we want a list of files containing the pattern */
nFlag=0, /* true if we want the character offset of the pattern */
sFlag=0, /* true if we want silent mode */
xFlag=0, /* true if we want only lines which match entirely */
hFlag=0, /* true if we want no filenames in output */
MatchCount=0; /* count of number of times a search string was found
* in the text */
char *FileName = 0;
SHAR_EOF
if test -f 'MakeDesc.c'
then
echo shar: over-writing existing file "'MakeDesc.c'"
fi
cat << \SHAR_EOF > 'MakeDesc.c'
#include <stdio.h>
#include "bm.h"
#include "Extern.h"
extern char * malloc();
/* makes a pattern descriptor */
struct PattDesc *MakeDesc(Pattern)
char *Pattern;
{
struct PattDesc *Desc;
Desc = (struct PattDesc *) malloc(sizeof(struct PattDesc));
if (!(Desc->Skip1 = (unsigned short int *)
malloc( sizeof(int) * (MAXCHAR + 1)))){
fprintf(stderr,"bm: can't allocate space\n");
exit(2);
} /* if */
if (!(Desc->Skip2 = (unsigned short int *)
malloc(sizeof(int) * strlen(Pattern)))){
fprintf(stderr,"bm: can't allocate space\n");
exit(2);
} /* if */
Desc->Pattern=Pattern;
Desc->PatLen = strlen(Desc->Pattern);
MakeSkip(Desc->Pattern,Desc->Skip1,
Desc->Skip2,Desc->PatLen);
return(Desc);
} /* main */
SHAR_EOF
if test -f 'MakeSkip.c'
then
echo shar: over-writing existing file "'MakeSkip.c'"
fi
cat << \SHAR_EOF > 'MakeSkip.c'
#include <stdio.h>
#include "bm.h"
extern char *malloc();
MakeSkip(Pattern,Skip1,Skip2,PatLen)
char Pattern[];
unsigned short int Skip1[], Skip2[];
int PatLen;
/* generate the skip tables for Boyer-Moore string search algorithm.
* Skip1 is the skip depending on the character which failed to match
* the pattern, and Skip2 is the skip depending on how far we got into
* the pattern. Pattern is the search pattern and PatLen is strlen(Pattern) */
{
int *BackTrack; /* backtracking table for t when building skip2 */
int c; /* general purpose constant */
int j,k,t,tp; /* indices into Skip's and BackTrack */
if (!(BackTrack = (int *) malloc(PatLen * (sizeof (int))))){
fprintf(stderr,"bm: can't allocate space\n");
exit(2);
} /* if */
for (c=0; c<=MAXCHAR; ++c)
Skip1[c] = PatLen;
for (k=0;k<PatLen;k++) {
Skip1[Pattern[k]] = PatLen - k - 1;
Skip2[k] = 2 * PatLen - k - 1;
} /* for */
for (j=PatLen - 1,t=PatLen;j >= 0; --j,--t) {
BackTrack[j] = t;
while (t<PatLen && Pattern[j] != Pattern[t]) {
Skip2[t] = min(Skip2[t], PatLen - j - 1);
t = BackTrack[t];
} /* while */
} /* for */
for (k=0;k<=t;++k)
Skip2[k] = min(Skip2[k],PatLen+t-k);
tp=BackTrack[t];
while(tp<PatLen) {
while(t<PatLen) {
Skip2[t] = min(Skip2[t],tp-t+PatLen);
++t;
} /* while */
tp = BackTrack[tp];
} /* while */
cfree(BackTrack);
} /* MakeSkip */
SHAR_EOF
if test -f 'MatchFound.c'
then
echo shar: over-writing existing file "'MatchFound.c'"
fi
cat << \SHAR_EOF > 'MatchFound.c'
#include <stdio.h>
#include "bm.h"
#include "Extern.h"
MatchFound(Desc, BuffPos, Buffer, BuffEnd)
struct PattDesc *Desc; /* state info about search for one string */
int BuffPos; /* offset of first char of buffer into the file being searched */
char *Buffer, /* pointer to the first character in the buffer */
*BuffEnd; /* pointer to the last character in the buffer */
{
char *MLineBegin, *MLineEnd;
Desc->Success = 0;
/* Start points to first character after a successful match */
MLineBegin = MLineEnd = Desc->Start - 1;
while(MLineBegin >=Buffer && *MLineBegin != '\n') --MLineBegin;
++MLineBegin;
while( MLineEnd <= BuffEnd && *MLineEnd != '\n') ++MLineEnd;
if (MLineEnd > BuffEnd) --MLineEnd;
/* fixed 25jun85 pdbain. suppress multiple matches of the same
* pattern on one line */
Desc->Start = MLineEnd + 1;
/* check if exact match */
if (xFlag && !( Desc->PatLen == (*MLineEnd != '\n' ? ((MLineEnd -
MLineBegin) + 1) : (MLineEnd - MLineBegin))))
return(0); /* failure */
if (sFlag) return(1);
if (cFlag) {
++MatchCount;
return(1);
} /* if */
PrintLine(BuffPos+(Desc->Start-Buffer),MLineBegin,MLineEnd);
return(1);
} /* MatchFound */
SHAR_EOF
if test -f 'MkDescVec.c'
then
echo shar: over-writing existing file "'MkDescVec.c'"
fi
cat << \SHAR_EOF > 'MkDescVec.c'
#include "bm.h"
#include <string.h>
/* scan a newline-separated string of patterns and set up the
* vector of descriptors, one pattern descriptor per pattern.
* Return the number of patterns */
int
MkDescVec(DescVec, Pats)
struct PattDesc *DescVec[];
char *Pats;
{
int NPats = 0;
char *EndPat;
extern struct PattDesc *MakeDesc();
/* some systems use "strchr" instead of "index" */
/* while (*Pats && (EndPat = index(Pats,'\n')) && NPats < MAXPATS) { */
while (*Pats && (EndPat = strchr(Pats,'\n')) && NPats < MAXPATS) {
*EndPat = '\0';
DescVec[NPats] = MakeDesc(Pats);
Pats = EndPat + 1;
++NPats;
} /* while */
if (*Pats && NPats < MAXPATS) {
DescVec[NPats] = MakeDesc(Pats);
++NPats;
} /* if */
return(NPats);
} /* MkDescVec */
SHAR_EOF
if test -f 'MoveResidue.c'
then
echo shar: over-writing existing file "'MoveResidue.c'"
fi
cat << \SHAR_EOF > 'MoveResidue.c'
#include "bm.h"
/* Moves the text which has not been completely searched at the end of
* the buffer to the beginning of the buffer
* and returns number of bytes moved */
int MoveResidue(DescVec, NPats,Buffer, BuffEnd)
struct PattDesc *DescVec[];
int NPats;
char *Buffer, *BuffEnd;
{
char *FirstStart, *Residue;
/* use this declaration if you don't use "bcopy" */
register char *f, *t;
int ResSize, i;
FirstStart = BuffEnd;
/* find the earliest point which has not been
* completely searched */
for (i=0; i < NPats; i++) {
FirstStart =
min(FirstStart, DescVec[i]-> Start );
} /* for */
/* now scan to the beginning of the line containing the
* unsearched text */
for (Residue = FirstStart; *Residue != '\n' &&
Residue >= Buffer; Residue--);
if (Residue < Buffer)
Residue = FirstStart;
else ++Residue;
ResSize = (BuffEnd - Residue + 1);
/* now move the residue to the beginning of
* the file */
/* use this if you don't have "bcopy" */
t = Buffer; f = Residue;
for(i=ResSize;i;--i)
*t++ = *f++;
/* use this if you do have "bcopy"
bcopy(Residue, Buffer, ResSize);
*/
/* now fix the status vectors */
for (i=0; i < NPats; i++) {
DescVec[i]->Start -= (Residue - Buffer );
} /* for */
return(ResSize);
} /* MoveResidue */
SHAR_EOF
if test -f 'PrintLine.c'
then
echo shar: over-writing existing file "'PrintLine.c'"
fi
cat << \SHAR_EOF > 'PrintLine.c'
#include <stdio.h>
#include <string.h>
#include "Extern.h"
PrintLine(OffSet,LineStart,LineEnd)
int OffSet; /* offset of LineStart from beginning of file */
char *LineStart,
*LineEnd;
{
char OffStr[80];
if (lFlag) {
if (strlen(FileName) > 76) {
fprintf(stderr,"bm: filename too long\n");
exit(2);
} /* if */
if (strlen(FileName)) {
sprintf(OffStr,"%s\n",FileName);
write(1,OffStr,strlen(OffStr));
} /* if */
return;
} /* if */
if (FileName && !hFlag) {
if (strlen(FileName) > 76) {
fprintf(stderr,"bm: filename too long\n");
exit(2);
} /* if */
sprintf(OffStr,"%s:",FileName);
write(1,OffStr,strlen(OffStr));
} /* if */
if (nFlag) {
sprintf(OffStr,"%d: ",OffSet);
write(1,OffStr,strlen(OffStr));
} /* if */
write(1,LineStart,LineEnd-LineStart+1);
if (*LineEnd != '\n') write (1,"\n",1);
} /* PrintLine */
SHAR_EOF
if test -f 'PutUsage.c'
then
echo shar: over-writing existing file "'PutUsage.c'"
fi
cat << \SHAR_EOF > 'PutUsage.c'
#include <stdio.h>
PutUsage()
{
fprintf(stderr,
"bm: search for a given string or strings in a file or files\n");
fprintf(stderr,
"synopsis: bm [option]* [string(s)] [file]*\n");
fprintf(stderr,
"options:\n");
fprintf(stderr,
"-c print only a count of matching lines \n");
fprintf(stderr,
"-e Take next argument as the pattern\n");
fprintf(stderr,
"-f PFile read patterns from a file PFile\n");
fprintf(stderr,
"-h Do not print file names\n");
fprintf(stderr,
"-l print a list of files containing the pattern(s) \n");
fprintf(stderr,
"-n print the character offset of the pattern(s) \n");
fprintf(stderr,
"-s silent mode. Return only success (0) or failure (1)\n");
fprintf(stderr,
"-x print only lines which match entirely \n");
} /*PutUsage */
SHAR_EOF
if test -f 'Search.c'
then
echo shar: over-writing existing file "'Search.c'"
fi
cat << \SHAR_EOF > 'Search.c'
#include "bm.h"
#include "Extern.h"
int Search(Pattern,PatLen,EndBuff, Skip1, Skip2, Desc)
register char Pattern[];
int PatLen;
char *EndBuff;
unsigned short int Skip1[], Skip2[];
struct PattDesc *Desc;
{
register char *k, /* indexes text */
*j; /* indexes Pattern */
register int Skip; /* skip distance */
char *PatEnd,
*BuffEnd; /* pointers to last char in Pattern and Buffer */
BuffEnd = EndBuff;
PatEnd = Pattern + PatLen - 1;
k = Desc->Start;
Skip = PatLen-1;
while ( Skip <= (BuffEnd - k) ) {
j = PatEnd;
k = k + Skip;
while (*j == *k) {
if (j == Pattern) {
/* found it. Start next search
* just after the pattern */
Desc -> Start = k + Desc->PatLen;
return(1);
} /* if */
--j; --k;
} /* while */
Skip = max(Skip1[*(unsigned char *)k],Skip2[j-Pattern]);
} /* while */
Desc->Start = k+Skip-(PatLen-1);
return(0);
} /* Search */
SHAR_EOF
if test -f 'Makefile'
then
echo shar: over-writing existing file "'Makefile'"
fi
cat << \SHAR_EOF > 'Makefile'
CCFLAGS = -O
SOURCES = bm.h bm.c Execute.c Extern.h\
GetPatFile.c Global.c MakeDesc.c MakeSkip.c \
MatchFound.c \
MkDescVec.c MoveResidue.c PrintLine.c PutUsage.c Search.c
OBJECTS = bm.o Execute.o \
GetPatFile.o Global.o MakeDesc.o MakeSkip.o \
MatchFound.o \
MkDescVec.o MoveResidue.o Search.o PrintLine.o PutUsage.o
BASEFILES = $(SOURCES) Makefile README bm.1
bm: $(OBJECTS)
cc -s -o bm $(CCFLAGS) $(OBJECTS)
install: bm
rm -f /usr/bin/bm
cp bm /usr/bin/bm
chmod ugo-w /usr/bin/bm
# rm /usr/src/public/bm/*
# cp $(BASEFILES) /usr/src/public/bm
shar:
/usr/local/bin/shar $(BASEFILES) >bm.shar
man: /usr/man/man1/bm.1
/usr/man/man1/bm.1: bm.1
rm -f /usr/man/man1/bm.1
cp bm.1 /usr/man/man1/bm.1
man bm > /dev/null
bm.o: bm.c bm.h Extern.h
cc -c $(CCFLAGS) bm.c
PutUsage.o: PutUsage.c bm.h
cc -c $(CCFLAGS) PutUsage.c
MakeSkip.o: MakeSkip.c bm.h
cc -c $(CCFLAGS) MakeSkip.c
Search.o: Search.c bm.h Extern.h
cc -c $(CCFLAGS) Search.c
Execute.o: Execute.c bm.h
cc -c $(CCFLAGS) Execute.c
MoveResidue.o: MoveResidue.c bm.h Extern.h
cc -c $(CCFLAGS) MoveResidue.c
MatchFound.o: MatchFound.c bm.h Extern.h
cc -c $(CCFLAGS) MatchFound.c
PrintLine.o: PrintLine.c Extern.h
cc -c $(CCFLAGS) PrintLine.c
MkDescVec.o: MkDescVec.c bm.h
cc -c $(CCFLAGS) MkDescVec.c
GetPatFile.o: GetPatFile.c bm.h
cc -c $(CCFLAGS) GetPatFile.c
MakeDesc.o: MakeDesc.c bm.h
cc -c $(CCFLAGS) MakeDesc.c
Global.o: Global.c
cc -c $(CCFLAGS) Global.c
listing:
# use -o for Sys V, -i for 4.2BSD
# print -i3 $(BASEFILES)
print -o3 $(BASEFILES)
clean:
rm -f *.o a.out foo bar blat junk core
SHAR_EOF
if test -f 'README'
then
echo shar: over-writing existing file "'README'"
fi
cat << \SHAR_EOF > 'README'
Bm is a fast pattern matching utility, intended to be almost
identical in functionality to fgrep (ugh!) but much faster. It uses
the Boyer-Moore algorithm, as described in the papers listed below:
D.E. Knuth, J.H. Morris, V.R. Pratt,"Fast Pattern Matching in Strings",
SIAM J. Comput., 6(2), June 1977, 323-350,
Z. Galil,
"On Improving the Worst Case Running Time of the Boyer-Moore String
Matching Algorithm",
CACM, 22(9), Sept. 1979, ACM,
R.S. Boyer, J.S. Moore,"A Fast String Searching Algorithm", CACM, 20(10),
Oct. 1977, 762-772,
G. de V. Smit,"A Comparison of Three String Matching Algorithms",
Software - Practice and Experience, vol. 12, 1982, 57-66,
*** NOTE *** There are certain system dependencies in the code.
Please check whether your system uses "index" or "strchr" to
find a character in a string: this affects MkDescVec.c.
Also check whether your system uses <strings.h> or <string.h>.
This affects match.c/bm.c, MkDescVec.c, and PrintLine.c.
Also check whether your system has "bcopy". If so, see MoveResidue.c
The files are MkDescVec.c, PrintLine.c, bm.c, and
Execute.c: search a file for the patterns
Extern.h: declarations of externs
GetPatFile.c: read in patterns from a file and set up a vector of
pattern descriptors
Global.c: global variables (complement to Extern.h)
MakeDesc.c: create a pattern descriptor for one pattern, including
skip tables, etc.
MakeSkip.c: make the skip tables for one pattern
Makefile: you can figure this one out for yourself
MatchFound.c: what to do when you actually FIND a pattern - print it,
update flags, etc.
MkDescVec.c: make a vector of pattern descriptors, given a string
of newline-separated patterns
MoveResidue.c: when you come to the end of the buffer, move the
unsearched "residue" to the beginning and start again
PrintLine.c: print the appropriate stuff after finding a match
PutUsage.c: mini-man page.
README: this file
Search.c: the guts. Implements B-M algorithm given a pattern, skip
tables for the pattern, and a buffer to search
bm.c: mainline. mostly interpreting the command line and tidying
up at the end. Calls Execute for each file.
bm.h: constants
bm.p: man page
SHAR_EOF
if test -f 'bm.1'
then
echo shar: over-writing existing file "'bm.1'"
fi
cat << \SHAR_EOF > 'bm.1'
.TH BM (1) "8 July 1985"
.UC 4
.SH NAME
bm \- search a file for a string
.SH SYNOPSIS
.B /usr/bin/bm
[ option ] ...
[ strings ]
[ file ]
.SH DESCRIPTION
.I Bm
searches the input
.I files
(standard input default) for lines matching a string.
Normally, each line found is copied to the standard output.
It is blindingly fast.
.I Bm
strings are fixed sequences of characters:
there are no wildcards, repetitions, or other features
of regular expressions.
Bm is also case sensitive.
The following options are recognized.
.TP
.B \-x
(Exact) only lines matched in their entirety are printed
.TP
.B \-l
The names of files with matching lines are listed (once) separated by newlines.
.TP
.B \-c
Only a count of the number of matches
is printed
.TP
.B \-e "string"
The string is the next argument after the
.B \-e
flag. This allows strings beginning with '-'.
.TP
.B \-h
No filenames are printed, even if multiple files are searched.
.TP
.B \-n
Each line is preceded by the number
of characters from the beginning of the file
to the match.
.TP
.B \-s
Silent mode. Nothing is printed (except error messages).
This is useful for checking the error status.
.TP
.BI \-f " path"
The string list
is taken from the
.I path.
This may be either a file or a tty.
.LP
Unless the
.B \-h
option is specified
the file name is shown if there is more than one input file.
Care should be taken when using the characters $ * [ ^ | ( ) and \\ in the
.I strings
(listed on the command line)
as they are also meaningful to the Shell. It is safest to enclose the entire
.I expression
argument in single quotes \' \'.
.LP
.I Bm
searches for lines that contain one of the (newline-separated)
.I strings,
using
the Boyer-Moore algorithm.
It is far superior in terms of speed to the grep (egrep, fgrep)
family of pattern matchers for fixed-pattern searching,
and its speed increases with pattern length.
.SH "SEE ALSO"
grep(1)
.SH DIAGNOSTICS
Exit status is 0 if any matches are found,
1 if none, 2 for syntax errors or inaccessible files.
.SH AUTHOR
Peter Bain (pdbain@bnr-vpa), with modifications suggested by John Gilmore
and Amir Plivatsky
.SH BUGS
Only 100 patterns are allowed.
.LP
Patterns may not contain newlines.
.LP
If a line (delimited by newlines, and the beginning and end of the file)
is longer than 8000 charcters (e.g. in a core dump),
it will not be completely printed.
.LP
If multiple patterns are specified, the order of the ouput lines is not
necessarily the same as the order of the input lines.
.LP
A line will be printed once for each different string on that line.
.LP
The algorithm cannot count lines.
.LP
The
.B -n
and
.B -c
work differently from fgrep.
.LP
The
.B -v,
.B -i,
and
.B -b
are not available.
SHAR_EOF
# End of shell archive
exit 0
% t argument after the
.B \-e
flag. This allows strings beginning with '-'.
.TP
.B \-h
No filenames are printed,